# -------------------------------------------------------------------
# Clean Data Files For Main Plots
# Date:
#       December 19, 2024
# -------------------------------------------------------------------

# -------------------------------------------------------------------
# Prepare Projections Data for Population Aged 60+ by Year
# -------------------------------------------------------------------
proj_pop <- pop_pro %>%
  select(countryregion, pop_60plus_1990, pop_60plus_2020, pop_60plus_2050) %>%
  rename(
    `1990` = pop_60plus_1990,
    `2020` = pop_60plus_2020,
    `2050` = pop_60plus_2050
  ) %>%
  mutate(across(starts_with("19"), round, 0)) %>%
  gather(measurement, value, `1990`:`2050`, factor_key = TRUE) %>%
  filter(countryregion %in% countries) %>%
  mutate(value = round(value/1000, 0)) %>%
  mutate(
    measurement = factor(
      measurement, levels = breaks_text[-length(breaks_text)], ordered = TRUE
      )
    )

# -------------------------------------------------------------------
# Clean and Reshape Total Population Data
# -------------------------------------------------------------------

totpop <- pop_pro %>%
  select(
    countryregion, 
    pop_total_1990, 
    pop_total_2020, 
    pop_total_2050, 
    pop_total_2100
  ) %>%
  filter(countryregion %in% keepobs) %>%
  add_rest_of_world("pop_total") %>%
  reshape_and_clean("pop_total", keepobs, order) 

# Add labels and measurement fields for total population data
totpop$name_lab <- name_lab
totpop$measurement_x <- measurement_x
totpop$lab <- lab

# -------------------------------------------------------------------
# Clean and Reshape Population Aged 60+ Data
# -------------------------------------------------------------------

over60 <- pop_pro %>%
  select(countryregion, pop_60plus_1990, pop_60plus_2020, pop_60plus_2050, pop_60plus_2100) %>%
  filter(countryregion %in% keepobs) %>%
  add_rest_of_world("pop_60plus") %>%
  reshape_and_clean("pop_60plus", keepobs, order)

# Adjust labels for over60 dataset
over60$name_lab <- c(
  NA, NA, NA, NA, '+ 171 %', '+ 155 %', '+ 121 %', NA, 
  '+ 100 %', '+ 144 %', '+ 194 %', NA,
  '- 29 %', '+ 59 %', '+ 313 %', NA
)
over60$lab <- c(
  '94 M ', '56 M ', '24 M', NA, NA, NA, NA, NA, 
  NA, NA, NA, NA,
  '362 M', '552 M', '644 M', NA
)

# -------------------------------------------------------------------
# Process Dependency Ratios for Over-60 Population
# -------------------------------------------------------------------

dep_rat_ov60 <- process_dependency_ratio(pop_pro, "depratio_15to59_", coun_comp)
old_age_rat_ov60 <- process_dependency_ratio(pop_pro, "oldagedepratio_15to59_", coun_comp)

# Final adjustments for old_age_rat_ov60
old_age_rat_ov60 <- old_age_rat_ov60 %>%
  mutate(name_lab = if_else(name_lab == 'Sub-Saharan Africa', 'Sub-Saharan Africa\n             Avg.', name_lab))

# -------------------------------------------------------------------
# Process Dependency Ratios for Over-65 Population (APPENDIX DATASET)
# -------------------------------------------------------------------

dep_rat_ov65 <- process_dependency_ratio(pop_pro, "depratio_15to64_", coun_comp)
old_age_rat_ov65 <- process_dependency_ratio(pop_pro, "oldagedepratio_15to64_", coun_comp)

old_age_rat_ov65 <- old_age_rat_ov65 %>%
  mutate(name_lab = if_else(name_lab == 'Sub-Saharan Africa', 'Sub-Saharan Africa\n             Avg.', name_lab))


# -------------------------------------------------------------------
# Define Labels for Stacked Plots
# -------------------------------------------------------------------

stacked_labels <- data.frame(
  labels = rev(order),
  countryregion = rev(order),
  condition = c(2060, 2069, 2070, 2055),
  measurement = c(2670000, 5300000, 6600000, 8600000),
  color = c("black", "white", "white", "black")
)

# -------------------------------------------------------------------
# Clean SSA Dataset and Create Variables
# -------------------------------------------------------------------

ssa <- ssa %>%
  mutate(
    gen = female,
    female = as.factor(female),
    age_group5 = as.factor(age_group5)
  ) %>%
  recode_values("female", c(`0` = 'Male', `1` = 'Female')) %>%
  recode_values("country", country_mapping) %>%
  mutate(
    region = ifelse(country %in% names(region_mapping), region_mapping[country], 'Other'),
    work_hrs = case_when(
      work_hrs >= 140 & work_hrs <= 168 ~ 140,
      work_hrs > 168 ~ NA_real_,
      TRUE ~ work_hrs
    ),
    depressed = ifelse(country == 'Nigeria', NA, depressed),
    gender = as.factor(female)
  )

# -------------------------------------------------------------------
# Data Transformation for Outcome Analysis
# -------------------------------------------------------------------

sum_ind <- sum_ind %>%
  select(head(var_sum, -1)) %>%
  arrange(desc(govtexpgdp)) %>%
  mutate(cou = ifelse(country %in% countries, 1, 0),
         oopexp = 100 - oopexp)

sum_ind_lon <- prep_data_long(
  sum_ind %>% filter(cou != 0),
  countries
)

# Process by Income Groups
high_inco <- process_income_group(sum_ind, 'HICs', c(indicators, var_hea))
low_middle_inco <- process_income_group(sum_ind, 'LMICs_nonSSA', c(indicators, var_hea))
sub_sahar_inco <- process_income_group(sum_ind, 'Sub-Saharan Africa', c(indicators, var_hea))


# -------------------------------------------------------------------
# Combine Sub-Saharan Africa Average with other countries
# -----------------------------------------------------------
ssa_wgt <- rbind(
  ssa_wgt %>% mutate(country = 'ssa') %>% select(country, everything()),
  cou_wgt %>% select(country, everything())
) %>%
  arrange(age_group5, country)

ssa_nwg <- rbind(
  ssa_nwg %>% mutate(country = 'ssa') %>% select(country, everything()),
  cou_nwg %>% select(country, everything())
) %>%
  arrange(age_group5, country)

# -------------------------------------------------------------------
# Prepare Data for Figure 2C
# -------------------------------------------------------------------

fig2c_data <- totpop %>% filter(countryregion != 'Rest of World')

fig2c_labels <- fig2c_data %>% mutate(
  condition = case_when(
    condition %in% c(1990) ~ NA,
    condition == 2020 ~ 2005,
    condition == 2050 ~ 2035,
    condition == 2100 ~ 2075
  ),
  measurement = case_when(
    name_lab == '+ 63 %' ~ 2776948, 
    name_lab == '- 8 %' ~ 1600170,
    name_lab == '- 42 %' ~ 1039655,
    name_lab == '+ 123 %' ~ 803083.2,
    name_lab == '+ 60 %' ~ 1133420 - 40000,
    name_lab == '+ 24 %' ~ 1289317,
    name_lab == '+ 20 %' ~ 1533439 + 40000,
    name_lab == '- 8 %' ~ 1068783,
    name_lab == '+ 90 %' ~ 1753968
  ),
  namecountry = if_else(condition == 2075, countryregion, NA_character_)
)

# -------------------------------------------------------------------
# Prepare Data for Figure 2D
# -------------------------------------------------------------------

fig2d_data <- over60 %>% filter(countryregion != 'Rest of World')

fig2d_labels <- fig2d_data %>% mutate(
  condition = case_when(
    condition %in% c(1990) ~ NA,
    condition == 2020 ~ 2005,
    condition == 2050 ~ 2035,
    condition == 2100 ~ 2075
  ),
  measurement = case_when(
    name_lab == '+ 313 %' ~ 400078.6, 
    name_lab == '+ 59 %' ~ 449769.9 + 20000,
    name_lab == '- 29 %' ~ 435510.7,
    name_lab == '+ 194 %' ~ 104476.2,
    name_lab == '+ 144 %' ~ 244946.5,
    name_lab == '+ 100 %' ~ 381784.9,
    name_lab == '+ 121 %' ~ 38513.54,
    name_lab == '+ 155 %' ~ 99004.16,
    name_lab == '+ 171 %' ~ 173914.6
  ),
  namecountry = if_else(condition == 2075, countryregion, NA_character_)
)